package au.com.acpfg.align.local;
import jaligner.Alignment;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import neobio.alignment.PairwiseAlignment;
import org.knime.core.data.DataCell;
import org.knime.core.data.DataColumnSpec;
import org.knime.core.data.DataColumnSpecCreator;
import org.knime.core.data.DataRow;
import org.knime.core.data.DataTableSpec;
import org.knime.core.data.DataType;
import org.knime.core.data.def.DefaultRow;
import org.knime.core.data.def.DoubleCell;
import org.knime.core.data.def.IntCell;
import org.knime.core.data.def.StringCell;
import org.knime.core.data.vector.bitvector.DenseBitVector;
import org.knime.core.data.vector.bitvector.DenseBitVectorCell;
import org.knime.core.data.vector.bitvector.DenseBitVectorCellFactory;
import org.knime.core.node.InvalidSettingsException;
import pal.alignment.AlignmentUtils;
import au.com.acpfg.align.muscle.MultiAlignmentCell;
public class AlignmentReporter {
private String m_a1, m_a2; // accessions for the pair of sequences
private String m_s1, m_s2; // sequences to be aligned
private MyAlignment m_alignment;
/**
* Constructor to get all non-provider specific fields initialised...
* @param a1
* @param s1
* @param a2
* @param s2
*/
protected AlignmentReporter(String a1, String s1, String a2, String s2) {
m_a1 = a1;
m_a2 = a2;
m_s1 = s1;
m_s2 = s2;
}
/**
* Use this constructor if JAligner is the provider being used.
* @param a
* @param a1
* @param s1
* @param a2
* @param s2
*/
public AlignmentReporter(Alignment a, String a1, String s1, String a2, String s2) {
this(a1, s1, a2, s2);
assert(a != null);
// gotta be careful with namespace pollution argh...
pal.misc.Identifier[] ids = new pal.misc.Identifier[] {
new pal.misc.Identifier(a1),
new pal.misc.Identifier(a2) };
m_alignment = new MyAlignment(ids,
new String[] {new String(a.getSequence1()), new String(a.getSequence2())},
a.getScore());
m_alignment.setTagLine(convert2neobio(a.getMarkupLine()));
}
public AlignmentReporter(Alignment a, String s1, String s2) {
this(a, "s1", s1, "s2", s2);
}
/**
* Use this constructor if NeoBio is the provider being used
* @param a
* @param a1
* @param s1
* @param a2
* @param s2
*/
public AlignmentReporter(PairwiseAlignment a, String a1, String s1, String a2, String s2) {
this(a1, s1, a2, s2);
assert(a != null);
// gotta be careful with namespace pollution argh...
pal.misc.Identifier[] ids = new pal.misc.Identifier[] {
new pal.misc.Identifier(a1),
new pal.misc.Identifier(a2) };
m_alignment = new MyAlignment(ids,
new String[] { a.getGappedSequence1(), a.getGappedSequence2() },
a.getScore());
m_alignment.setTagLine(a.getScoreTagLine());
}
public AlignmentReporter(PairwiseAlignment a, String s1, String s2) {
this(a, "s1", s1, "s2", s2);
}
public static DataTableSpec getTableSpec(String[] wanted) throws InvalidSettingsException {
assert(wanted != null && wanted.length > 0);
ArrayList<DataColumnSpec> cols = new ArrayList<DataColumnSpec>();
for (String want : wanted) {
if (want.equals("Alignment Cell")) {
cols.add(new DataColumnSpecCreator("Alignment Cell", MultiAlignmentCell.TYPE).createSpec());
} else if (want.equals("Accessions")) {
cols.add(new DataColumnSpecCreator("Accession #1", StringCell.TYPE).createSpec());
cols.add(new DataColumnSpecCreator("Accession #2", StringCell.TYPE).createSpec());
} else if (want.equals("Original Sequences")) {
cols.add(new DataColumnSpecCreator("Original Sequence #1", StringCell.TYPE).createSpec());
cols.add(new DataColumnSpecCreator("Original Sequence #2", StringCell.TYPE).createSpec());
} else if (want.equals("Gapped Sequences")) {
cols.add(new DataColumnSpecCreator("Gapped Sequence #1", StringCell.TYPE).createSpec());
cols.add(new DataColumnSpecCreator("Gapped Sequence #2", StringCell.TYPE).createSpec());
} else if (want.equals("Tag Line")) {
cols.add(new DataColumnSpecCreator("Tag Line", StringCell.TYPE).createSpec());
} else if (want.equals("Gap count (Sequence1)") || want.equals("Gap count (Sequence2)")
|| want.equals("Gap count (sum of both sequences)")) {
cols.add(new DataColumnSpecCreator(want, IntCell.TYPE).createSpec());
} else if (want.equals("Identities (%)") || want.startsWith("Similarities ")) {
cols.add(new DataColumnSpecCreator(want, DoubleCell.TYPE).createSpec());
} else if (want.equals("Total Gaps (Sequence1)") ||
want.equals("Total Gaps (Sequence2)")) {
cols.add(new DataColumnSpecCreator(want, IntCell.TYPE).createSpec());
} else if (want.equals("Score")) {
cols.add(new DataColumnSpecCreator(want, DoubleCell.TYPE).createSpec());
} else if (want.endsWith("(BitVector)") || want.startsWith("Gap regions")) {
cols.add(new DataColumnSpecCreator(want, DenseBitVectorCell.TYPE).createSpec());
} else if (want.startsWith("Extent")) {
cols.add(new DataColumnSpecCreator(want, DoubleCell.TYPE).createSpec());
} else if (want.startsWith("Alignment in")) {
cols.add(new DataColumnSpecCreator(want, StringCell.TYPE).createSpec());
} else {
throw new InvalidSettingsException("Unknown alignment datum: "+want);
}
}
return new DataTableSpec(cols.toArray(new DataColumnSpec[0]));
}
public DataRow getRow(int align_no, DataTableSpec spec) throws IOException {
DataCell[] cells = new DataCell[spec.getNumColumns()];
for (int i=0; i<spec.getNumColumns(); i++) {
DataColumnSpec col = spec.getColumnSpec(i);
String task = col.getName().toLowerCase();
if (task.equals("accession #1")) {
cells[i] = new StringCell(m_a1);
} else if (task.equals("accession #2")) {
cells[i] = new StringCell(m_a2);
} else if (task.equals("original sequence #1")) {
cells[i] = new StringCell(getSequence1());
} else if (task.equals("original sequence #2")) {
cells[i] = new StringCell(getSequence2());
} else if (task.equals("gapped sequence #1")) {
cells[i] = new StringCell(getGappedSequence(true));
} else if (task.equals("gapped sequence #2")) {
cells[i] = new StringCell(getGappedSequence(false));
} else if (task.equals("score")) {
cells[i] = new DoubleCell(getScore());
} else if (task.equals("tag line")) {
cells[i] = new StringCell(getTagLine());
} else if (task.equals("total gaps (sequence1)")) {
cells[i] = new IntCell(getGapLength(true));
} else if (task.equals("total gaps (sequence2)")) {
cells[i] = new IntCell(getGapLength(false));
} else if (task.equals("identities (%)")) {
cells[i] = new DoubleCell(((double)getIdentityMatches()) / getTagLine().length() * 100.0);
} else if (task.startsWith("similarities ")) {
cells[i] = new DoubleCell(((double)getSimilarityMatches()) / getTagLine().length()*100.0);
} else if (task.equals("extent (%sequence1)")) {
cells[i] = new DoubleCell(getExtent(true));
} else if (task.equals("extent (%sequence2)")) {
cells[i] = new DoubleCell(getExtent(false));
} else if (task.startsWith("identical regions")) {
cells[i] = getIdentityBitVectorCell();
} else if (task.startsWith("similar regions")) {
cells[i] = getSimilarBitVectorCell();
} else if (task.startsWith("gap regions")) {
cells[i] = getGappedBitVectorCell(task);
} else if (task.startsWith("gap regions")) {
if (task.endsWith("sequence1)")) {
cells[i] = new IntCell(getGapStarts(true));
} else if (task.endsWith("sequence2)")) {
cells[i] = new IntCell(getGapStarts(false));
} else {
int sum = getGapStarts(true);
sum += getGapStarts(false);
cells[i]= new IntCell(sum);
}
} else if (task.startsWith("alignment in blast")) {
cells[i] = new StringCell(get_alignment("blast"));
} else if (task.startsWith("alignment in clustal")) {
cells[i] = new StringCell(get_alignment("clustalw"));
} else if (task.startsWith("alignment in fasta")) {
cells[i] = new StringCell(get_alignment("fasta"));
} else if (task.startsWith("alignment cell")) {
cells[i] = new MultiAlignmentCell(get_alignment("fasta"));
} else {
cells[i] = DataType.getMissingCell();
}
}
return new DefaultRow("A"+align_no, cells);
}
/**
* Returns the alignment in the requested format. TODO: incomplete
*
* @param required_format one of "blast", "clustalw" and "fasta" for now
* @return string representation of this alignment in desired format
*/
private String get_alignment(String required_format) {
StringWriter sw = new StringWriter(100 * 1024);
PrintWriter pw = new PrintWriter(sw);
if (required_format.equals("clustalw")) {
AlignmentUtils.printCLUSTALW(m_alignment, pw);
} else if (required_format.equals("blast")) {
// TODO... produce a proper blast format
AlignmentUtils.printPlain(m_alignment, pw);
} else if (required_format.equals("fasta")) {
for (int i=0; i<m_alignment.getSequenceCount(); i++) {
pw.println(">"+m_alignment.getIdentifier(i).getName());
pw.println(m_alignment.getAlignedSequenceString(i));
}
}
pw.close();
return sw.toString();
}
public double getScore() {
return m_alignment.getScore();
}
public String getGappedSequence(boolean is_1) {
if (is_1) {
return m_alignment.getAlignedSequenceString(0);
} else {
return m_alignment.getAlignedSequenceString(1);
}
}
public String getSequence1() {
return m_s1;
}
public String getSequence2() {
return m_s2;
}
public int getIdentityMatches() {
String tags = getTagLine();
int cnt = 0;
for (int i=0; i<tags.length(); i++) {
if (Character.isLetter(tags.charAt(i))) {
cnt++;
}
}
return cnt;
}
public int getSimilarityMatches() {
String tags = getTagLine();
int cnt = 0;
for (int i=0; i<tags.length(); i++) {
if (tags.charAt(i) == '+')
cnt++;
}
return cnt;
}
public int getGapStarts(boolean is_1) {
String seq = getGappedSequence(is_1);
Pattern p = Pattern.compile("[A-Z]\\-");
Matcher m = p.matcher(seq);
int base = 0;
int cnt = 0;
while (m.find(base)) {
cnt++;
base = m.start() + 1;
}
return cnt;
}
public double getExtent(boolean from_seq1) {
int length = getGappedSequence(from_seq1).length();
int gaps = getGapLength(from_seq1);
assert(gaps < length && gaps >= 0 && length > 0);
int seq_len= from_seq1 ? m_s1.length() : m_s2.length();
return ((double) length - gaps) / seq_len * 100.0;
}
public int getGapLength(boolean from_seq1) {
String gapped_seq = getGappedSequence(from_seq1);
int cnt = 0;
for (int i=0; i<gapped_seq.length(); i++) {
if (gapped_seq.charAt(i) == '-')
cnt++;
}
return cnt;
}
public String getTagLine() {
return m_alignment.getTagLine();
}
protected String convert2neobio(char[] jalign_markup) {
StringBuffer sb = new StringBuffer();
String gseq = getGappedSequence(true);
for (int i=0; i<jalign_markup.length; i++) {
if (jalign_markup[i] == '|') { // identity? if so, replace with letter from gapped sequence
sb.append(gseq.charAt(i));
} else if (jalign_markup[i] == ':') { // similarity? replace with '+'
sb.append('+');
} else if (jalign_markup[i] == '.') { // mismatch? for consistency with neobio we make it blank
sb.append(' ');
} else if (jalign_markup[i] == ' ') { // gap?
sb.append(' ');
} else { // leave untouched, but should not happen!
sb.append(jalign_markup[i]);
}
}
assert(sb.length() == gseq.length());
return sb.toString();
}
public DenseBitVectorCell getIdentityBitVectorCell() {
String tags = getTagLine();
DenseBitVector bv = new DenseBitVector(tags.length());
for (int i=0; i<tags.length(); i++) {
if (Character.isLetter(tags.charAt(i)))
bv.set(i);
}
DenseBitVectorCellFactory f = new DenseBitVectorCellFactory(bv);
return f.createDataCell();
}
public DenseBitVectorCell getSimilarBitVectorCell() {
String tags = getTagLine();
DenseBitVector bv = new DenseBitVector(tags.length());
for (int i=0; i<tags.length(); i++) {
if (tags.charAt(i) == '+')
bv.set(i);
}
DenseBitVectorCellFactory f = new DenseBitVectorCellFactory(bv);
return f.createDataCell();
}
public DenseBitVectorCell getGappedBitVectorCell(String task) {
String seq1 = getGappedSequence(true);
String seq2 = getGappedSequence(false);
DenseBitVector bv = new DenseBitVector(seq1.length());
assert(seq1.length() == seq2.length());
if (task.endsWith("(sequence1)")) {
for (int i=0; i<seq1.length(); i++) {
if (seq1.charAt(i) == '-')
bv.set(i);
}
} else if (task.endsWith("(sequence2)")) {
for (int i=0; i<seq2.length(); i++) {
if (seq2.charAt(i) == '-')
bv.set(i);
}
} else if (task.endsWith("(union)")) {
for (int i=0; i<seq2.length(); i++) {
if (seq2.charAt(i) == '-' || seq1.charAt(i) == '-')
bv.set(i);
}
} else if (task.endsWith("(intersection)")) {
for (int i=0; i<seq2.length(); i++) {
if (seq2.charAt(i) == '-' && seq1.charAt(i) == '-')
bv.set(i);
}
}
DenseBitVectorCellFactory f = new DenseBitVectorCellFactory(bv);
return f.createDataCell();
}
}